// re2zig $INPUT -o $OUTPUT

const std = @import("std");

const bufsize = 4095;
const none = std.math.maxInt(usize);

const err = error.SyntaxError;

const SemVer = struct {
    major: u32,
    minor: u32,
    patch: u32,
};

fn s2n(str: []const u8) u32 { // convert a pre-parsed string to a number
    var n: u32 = 0;
    for (str) |c| { n = n * 10 + (c - 48); }
    return n;
}

const State = struct {
    file: *std.Io.Reader,
    yyinput: [bufsize + 1]u8,
    yycursor: usize,
    yymarker: usize,
    yylimit: usize,
    token: usize,
    // Intermediate tag variables must be part of the lexer state passed to YYFILL.
    // They don't correspond to tags and should be autogenerated by re2c.
    %{stags format = "@@: usize,\n"; %}
    eof: bool
};

fn fill(st: *State) i32 {
    if (st.eof) { return -1; } // unexpected EOF

    // Error: lexeme too long. In real life can reallocate a larger buffer.
    if (st.token < 1) { return -2; }

    // Shift buffer contents (discard everything up to the current token).
    std.mem.copyBackwards(
        u8, st.yyinput[0..st.yylimit - st.token], st.yyinput[st.token..st.yylimit]);
    st.yycursor -= st.token;
    st.yymarker = @subWithOverflow(st.yymarker, st.token)[0];
    st.yylimit -= st.token;
    // Tag variables need to be shifted like other input positions. The check
    // for NONE is only needed if some tags are nested inside of alternative or
    // repetition, so that they can have NONE value.
    %{stags format = "if (st.@@ != none) st.@@ = @subWithOverflow(st.@@, st.token)[0];\n"; %}
    st.token = 0;

    // Fill free space at the end of buffer with new data from file.
    st.yylimit += st.file.readSliceShort(st.yyinput[st.yylimit..bufsize]) catch 0;
    st.yyinput[st.yylimit] = 0; // append sentinel symbol

    // If read less than expected, this is the end of input.
    st.eof = st.yylimit < bufsize;

    return 0;
}

fn parse(st: *State) !std.ArrayList(SemVer) {
    var vers = try std.ArrayList(SemVer).initCapacity(std.testing.allocator, 0);

    // Final tag variables available in semantic action.
    %{svars format = "var @@: usize = 0;\n"; %}

    loop: while (true) {
        st.token = st.yycursor;
        %{
            re2c:api = record;
            re2c:eof = 0;
            re2c:tags = 1;
            re2c:yyrecord = st;
            re2c:YYFILL = "fill(st) == 0";

            num = [0-9]+;

            num @t1 "." @t2 num @t3 ("." @t4 num)? [\n] {
                try vers.append(std.testing.allocator, SemVer {
                    .major = s2n(st.yyinput[st.token..t1]),
                    .minor = s2n(st.yyinput[t2..t3]),
                    .patch = if (t4 == none) 0 else s2n(st.yyinput[t4..st.yycursor - 1]),
                });
                continue :loop;
            }
            $ { return vers; }
            * { return error.SyntaxError; }
        %}
    }
}

test {
    const fname = "input";
    const content = "1.22.333\n" ** bufsize;

    // Prepare input file: a few times the size of the buffer, containing
    // strings with zeroes and escaped quotes.
    var fw = try std.fs.cwd().createFile(fname, .{});
    try fw.writeAll(content);
    fw.close();

    // Prepare lexer state: all offsets are at the end of buffer.
    // Use unbuffered reader - lexer does its own buffering.
    const zerobuf: [0]u8 = undefined;
    var fr = try std.fs.cwd().openFile(fname, .{.mode = .read_only});
    var reader = fr.reader(&zerobuf);
    var st = State{
        .file = &reader.interface,
        .yyinput = undefined,
        .yycursor = bufsize,
        .yymarker = bufsize,
        .yylimit = bufsize,
        .token = bufsize,
        %{stags format = ".@@ = none,\n"; %}
        .eof = false,
    };
    // Sentinel at `yylimit` offset is set to zero, which triggers YYFILL.
    st.yyinput[st.yylimit] = 0;

    // Manually construct expected result.
    var expect = [_]SemVer{SemVer{.major = 1, .minor = 22, .patch = 333}} ** bufsize;

    // Run the lexer.
    var result = try parse(&st);
    try std.testing.expectEqualDeep(&expect, result.items);

    // Cleanup: free memory and remove input file.
    result.deinit(std.testing.allocator);
    fr.close();
    try std.fs.cwd().deleteFile(fname);
}
